UCLA Housing dataset

Dataset URL: https://www.kaggle.com/apratim87/housingdata/data


In [1]:
import shutil
import math
import multiprocessing
from datetime import datetime

import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline

import pandas as pd
import numpy as np

import tensorflow as tf
from tensorflow import data
from tensorflow.python.feature_column import feature_column

print(tf.__version__)


/Users/khalidsalama/anaconda/lib/python3.6/importlib/_bootstrap.py:205: RuntimeWarning: compiletime version 3.5 of module 'tensorflow.python.framework.fast_tensor_util' does not match runtime version 3.6
  return f(*args, **kwds)
1.4.1

In [2]:
MODEL_NAME = 'housing-price-model-01'

DATA_FILE = 'data/housingdata.csv'

TRAIN_DATA_FILES_PATTERN = 'data/housing-train-01.csv'
TEST_DATA_FILES_PATTERN = 'data/housing-test-01.csv'

RESUME_TRAINING = False
PROCESS_FEATURES = True
EXTEND_FEATURE_COLUMNS = True
MULTI_THREADING = True

Define Dataset Metadata


In [3]:
HEADER = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV']

HEADER_DEFAULTS = [[0.0],[0.0],[0.0],['NA'],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0]]

NUMERIC_FEATURE_NAMES = ['CRIM', 'ZN','INDUS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT']
CATEGORICAL_FEATURE_NAMES_WITH_VOCABULARY = {'CHAS':['0', '1']}
CATEGORICAL_FEATURE_NAMES = list(CATEGORICAL_FEATURE_NAMES_WITH_VOCABULARY.keys())

FEATURE_NAMES = NUMERIC_FEATURE_NAMES + CATEGORICAL_FEATURE_NAMES

TARGET_NAME = 'MEDV'

UNUSED_FEATURE_NAMES = list(set(HEADER) - set(FEATURE_NAMES) - {TARGET_NAME})

print("Header: {}".format(HEADER))
print("Numeric Features: {}".format(NUMERIC_FEATURE_NAMES))
print("Categorical Features: {}".format(CATEGORICAL_FEATURE_NAMES))
print("Target: {}".format(TARGET_NAME))
print("Unused Features: {}".format(UNUSED_FEATURE_NAMES))


Header: ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV']
Numeric Features: ['CRIM', 'ZN', 'INDUS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT']
Categorical Features: ['CHAS']
Target: MEDV
Unused Features: []

Load and Analyse Dataset


In [4]:
housing_dataset = pd.read_csv(DATA_FILE, header=None, names=HEADER )
housing_dataset.head()


Out[4]:
CRIM ZN INDUS CHAS NOX RM AGE DIS RAD TAX PTRATIO B LSTAT MEDV
0 0.00632 18.0 2.31 0 0.538 6.575 65.2 4.0900 1 296 15.3 396.90 4.98 24.0
1 0.02731 0.0 7.07 0 0.469 6.421 78.9 4.9671 2 242 17.8 396.90 9.14 21.6
2 0.02729 0.0 7.07 0 0.469 7.185 61.1 4.9671 2 242 17.8 392.83 4.03 34.7
3 0.03237 0.0 2.18 0 0.458 6.998 45.8 6.0622 3 222 18.7 394.63 2.94 33.4
4 0.06905 0.0 2.18 0 0.458 7.147 54.2 6.0622 3 222 18.7 396.90 5.33 36.2

Visualise and Explore


In [5]:
plt.close('all')
plt.figure(figsize=(20, 30))
n_rows = 5
n_cols = 3

for col_index in range(len(housing_dataset.columns)):
    
    bins = 50
    
    feature_name = HEADER[col_index]
    
    plt.subplot(n_rows,n_cols,col_index+1)

    plt.title("{} Histogram".format(feature_name))
    series = housing_dataset[feature_name]
    
    if feature_name in ['CRIM']:
        series = np.log(series+0.01)        
   
    if feature_name in ['ZN','RAD']:
        bins = 10
    
    plt.hist(series, bins=bins)
    plt.xlabel(feature_name)
    plt.ylabel("Frequency")



In [6]:
plt.close('all')
plt.figure(figsize=(20, 30))
n_rows = 5
n_cols = 3

for col_index in range(len(FEATURE_NAMES)):
    
    feature_name = FEATURE_NAMES[col_index]
    plt.subplot(n_rows, n_cols, col_index+1)
    plt.title("{} vs MEDV".format(feature_name))
    
    if feature_name in NUMERIC_FEATURE_NAMES:
        
        series = housing_dataset[feature_name]
    
        if feature_name == 'CRIM':
            series = np.log(series+0.01)
             
        plt.scatter(series,housing_dataset.MEDV)
        
    else:
        
        feature_vocab = CATEGORICAL_FEATURE_NAMES_WITH_VOCABULARY[feature_name]
        
        feature_values = []
        
        for v in feature_vocab:
            feature_values = feature_values + [housing_dataset.MEDV[housing_dataset[feature_name] == int(v)].values]

        plt.boxplot(feature_values)
        
    plt.xlabel(feature_name)
    plt.ylabel("MEDV")


Prepare Training and Test Sets


In [7]:
DATA_SIZE = len(housing_dataset)

print("Dataset size: {}".format(DATA_SIZE))

train_data = housing_dataset.sample(frac=0.70, random_state = 19830610)
test_data = housing_dataset[~housing_dataset.index.isin(train_data.index)]

TRAIN_DATA_SIZE = len(train_data)
TEST_DATA_SIZE = len(test_data)

print("Train set size: {}".format(TRAIN_DATA_SIZE))
print("Test set size: {}".format(TEST_DATA_SIZE))
print("")


Dataset size: 506
Train set size: 354
Test set size: 152

Compute Scaling Statistics for Numeric Columns


In [8]:
means = train_data[NUMERIC_FEATURE_NAMES].mean(axis=0)
stdvs = train_data[NUMERIC_FEATURE_NAMES].std(axis=0)
maxs = train_data[NUMERIC_FEATURE_NAMES].max(axis=0)
mins = train_data[NUMERIC_FEATURE_NAMES].min(axis=0)
df_stats = pd.DataFrame({"mean":means, "stdv":stdvs, "max":maxs, "min":mins})
df_stats.head(15)


Out[8]:
max mean min stdv
CRIM 88.9762 3.680534 0.00632 8.706143
ZN 100.0000 11.336158 0.00000 23.175461
INDUS 27.7400 10.912542 0.46000 6.848749
NOX 0.8710 0.552093 0.38500 0.114226
RM 8.7800 6.269175 3.56100 0.720181
AGE 100.0000 68.344068 6.00000 27.850865
DIS 12.1265 3.796611 1.12960 2.126096
RAD 24.0000 9.579096 1.00000 8.732779
TAX 711.0000 409.087571 187.00000 168.960346
PTRATIO 22.0000 18.505085 12.60000 2.150993
B 396.9000 358.707006 0.32000 88.017787
LSTAT 37.9700 12.724322 1.73000 7.156888

Explore Variability of Target Variable


In [9]:
price_mean = train_data.MEDV.mean()
train_rmse = np.sqrt(np.mean(np.square(train_data.MEDV - price_mean)))
test_rmse = np.sqrt(np.mean(np.square(test_data.MEDV - price_mean)))

print("Mean Price: {}".format(round(price_mean,3)))
print("Train RMSE: {}".format(round(train_rmse,3)))
print("Test RMSE: {}".format(round(test_rmse,3)))


Mean Price: 22.275
Train RMSE: 8.958
Test RMSE: 9.714

Save Training/Test Data and Scaling Stats


In [10]:
train_data.to_csv(path_or_buf="data/housing-train-01.csv", header=False, index=False)
test_data.to_csv(path_or_buf="data/housing-test-01.csv", header=False, index=False)
df_stats.to_csv(path_or_buf="data/housing-stats.csv", header=True, index=True)

pd.read_csv("data/housing-train-01.csv", header=None, names=HEADER).head()


Out[10]:
CRIM ZN INDUS CHAS NOX RM AGE DIS RAD TAX PTRATIO B LSTAT MEDV
0 0.12802 0.0 8.56 0 0.5200 6.474 97.1 2.4329 5 384 20.9 395.24 12.27 19.8
1 0.32982 0.0 21.89 0 0.6240 5.822 95.4 2.4699 4 437 21.2 388.69 15.03 18.4
2 0.43571 0.0 10.59 1 0.4890 5.344 100.0 3.8750 4 277 18.6 396.90 23.09 20.0
3 0.03466 35.0 6.06 0 0.4379 6.031 23.3 6.6407 1 304 16.9 362.25 7.83 19.4
4 14.23620 0.0 18.10 0 0.6930 6.343 100.0 1.5741 24 666 20.2 396.90 20.32 7.2

Define Data Input Functions

a. Parsing and preprocessing logic


In [11]:
def parse_csv_row(csv_row):
    
    columns = tf.decode_csv(csv_row, record_defaults=HEADER_DEFAULTS)
    features = dict(zip(HEADER, columns))
    
    for column in UNUSED_FEATURE_NAMES:
        features.pop(column)
    
    target = features.pop(TARGET_NAME)

    return features, target


def process_features(features):
    
    features['CRIM'] = tf.log(features['CRIM']+0.01)
    features['B'] = tf.clip_by_value(features['B'], clip_value_min=300, clip_value_max=500)
    
    return features

b. Data pipeline input function


In [12]:
def csv_input_fn(files_name_pattern, mode=tf.estimator.ModeKeys.EVAL, 
                 skip_header_lines=0, 
                 num_epochs=None, 
                 batch_size=200):
    
    shuffle = True if mode == tf.estimator.ModeKeys.TRAIN else False
    
    num_threads = multiprocessing.cpu_count() if MULTI_THREADING else 1
    
    print("")
    print("* data input_fn:")
    print("================")
    print("Input file(s): {}".format(files_name_pattern))
    print("Batch size: {}".format(batch_size))
    print("Epoch Count: {}".format(num_epochs))
    print("Mode: {}".format(mode))
    print("Thread Count: {}".format(num_threads))
    print("Shuffle: {}".format(shuffle))
    print("================")
    print("")
    
    file_names = tf.matching_files(files_name_pattern)

    dataset = data.TextLineDataset(filenames=file_names)
    dataset = dataset.skip(skip_header_lines)
    
    if shuffle:
        dataset = dataset.shuffle(buffer_size=2 * batch_size + 1)
    
    dataset = dataset.batch(batch_size)
    dataset = dataset.map(lambda csv_row: parse_csv_row(csv_row),  num_parallel_calls=num_threads)
    
    if PROCESS_FEATURES:
        dataset = dataset.map(lambda features, target: (process_features(features), target),  
                              num_parallel_calls=num_threads)
    
    dataset = dataset.repeat(num_epochs)
    iterator = dataset.make_one_shot_iterator()
    
    features, target = iterator.get_next()
    return features, target

In [13]:
features, target = csv_input_fn(files_name_pattern="")
print("Features in CSV: {}".format(list(features.keys())))
print("Target in CSV: {}".format(target))


* data input_fn:
================
Input file(s): 
Batch size: 200
Epoch Count: None
Mode: eval
Thread Count: 4
Shuffle: False
================

Features in CSV: ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT']
Target in CSV: Tensor("IteratorGetNext:13", shape=(?,), dtype=float32)

Define Feature Columns

a. Load scaling stats


In [14]:
df_stats = pd.read_csv("data/housing-stats.csv", header=0, index_col=0)
df_stats['feature_name'] = NUMERIC_FEATURE_NAMES
df_stats.head()


Out[14]:
max mean min stdv feature_name
CRIM 88.9762 3.680534 0.00632 8.706143 CRIM
ZN 100.0000 11.336158 0.00000 23.175461 ZN
INDUS 27.7400 10.912542 0.46000 6.848749 INDUS
NOX 0.8710 0.552093 0.38500 0.114226 NOX
RM 8.7800 6.269175 3.56100 0.720181 RM

b. Create Feature Columns


In [15]:
def extend_feature_columns(feature_columns, hparams):
    
    
    ZN_bucketized = tf.feature_column.bucketized_column(
        feature_columns['ZN'],
        boundaries = [0,10,100])
        
    RAD_bucketized = tf.feature_column.bucketized_column(
        feature_columns['RAD'],
        boundaries = [0,10,25] )
        
    TAX_bucketized = tf.feature_column.bucketized_column(
        feature_columns['TAX'],
        boundaries = [0,200,300,500,800])
        
    ZN_bucketized_X_ZN_bucketized = tf.feature_column.crossed_column([ZN_bucketized,RAD_bucketized],4)
    ZN_bucketized_X_TAX_bucketized = tf.feature_column.crossed_column([ZN_bucketized,TAX_bucketized],8)
    RAD_bucketized_X_TAX_bucketized = tf.feature_column.crossed_column([RAD_bucketized,TAX_bucketized],8)
        
    feature_columns['ZN_bucketized'] = ZN_bucketized
    feature_columns['RAD_bucketized'] = RAD_bucketized
    feature_columns['TAX_bucketized'] = TAX_bucketized
    
    feature_columns['ZN_bucketized_X_ZN_bucketized'] = ZN_bucketized_X_ZN_bucketized
    feature_columns['ZN_bucketized_X_TAX_bucketized'] = ZN_bucketized_X_TAX_bucketized
    feature_columns['RAD_bucketized_X_TAX_bucketized'] = RAD_bucketized_X_TAX_bucketized
  
    return feature_columns

def standard_scaler(x, mean, stdv):
    return (x-mean)/(stdv)

def maxmin_scaler(x, max_value, min_value):
    return (x-min_value)/(max_value-min_value)

def get_feature_columns(hparams):
    
    numeric_columns = {}
    
    for feature_name in NUMERIC_FEATURE_NAMES:
        
        feature_max = df_stats[df_stats.feature_name == feature_name]['max'].values[0]
        feature_min = df_stats[df_stats.feature_name == feature_name]['min'].values[0]
        normalizer_fn = lambda x: maxmin_scaler(x, feature_max, feature_min)

#         feature_mean = df_stats[df_stats.feature_name == feature_name]['mean'].values[0]
#         feature_stdv = df_stats[df_stats.feature_name == feature_name]['stdv'].values[0]
#         normalizer_fn = lambda x: standard_scaler(x, feature_mean, feature_stdv)
        
        numeric_columns[feature_name] = tf.feature_column.numeric_column(feature_name, 
                                                                         #normalizer_fn=normalizer_fn
                                                                        )
    CONSTRUCTED_NUMERIC_FEATURES_NAMES = []
    
    if PROCESS_FEATURES:
        for feature_name in CONSTRUCTED_NUMERIC_FEATURES_NAMES:
            numeric_columns[feature_name] = tf.feature_column.numeric_column(feature_name)

    categorical_column_with_vocabulary = \
        {item[0]: tf.feature_column.categorical_column_with_vocabulary_list(item[0], item[1])
         for item in CATEGORICAL_FEATURE_NAMES_WITH_VOCABULARY.items()}
        
    feature_columns = {}

    if numeric_columns is not None:
        feature_columns.update(numeric_columns)

    if categorical_column_with_vocabulary is not None:
        feature_columns.update(categorical_column_with_vocabulary)
    
    if EXTEND_FEATURE_COLUMNS:
        feature_columns = extend_feature_columns(feature_columns, hparams)
        
    return feature_columns

feature_columns = get_feature_columns(tf.contrib.training.HParams(num_buckets=5,embedding_size=3))
print("Feature Columns: {}".format(feature_columns))


Feature Columns: {'CRIM': _NumericColumn(key='CRIM', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), 'ZN': _NumericColumn(key='ZN', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), 'INDUS': _NumericColumn(key='INDUS', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), 'NOX': _NumericColumn(key='NOX', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), 'RM': _NumericColumn(key='RM', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), 'AGE': _NumericColumn(key='AGE', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), 'DIS': _NumericColumn(key='DIS', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), 'RAD': _NumericColumn(key='RAD', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), 'TAX': _NumericColumn(key='TAX', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), 'PTRATIO': _NumericColumn(key='PTRATIO', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), 'B': _NumericColumn(key='B', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), 'LSTAT': _NumericColumn(key='LSTAT', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), 'CHAS': _VocabularyListCategoricalColumn(key='CHAS', vocabulary_list=('0', '1'), dtype=tf.string, default_value=-1, num_oov_buckets=0), 'ZN_bucketized': _BucketizedColumn(source_column=_NumericColumn(key='ZN', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), boundaries=(0, 10, 100)), 'RAD_bucketized': _BucketizedColumn(source_column=_NumericColumn(key='RAD', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), boundaries=(0, 10, 25)), 'TAX_bucketized': _BucketizedColumn(source_column=_NumericColumn(key='TAX', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), boundaries=(0, 200, 300, 500, 800)), 'ZN_bucketized_X_ZN_bucketized': _CrossedColumn(keys=(_BucketizedColumn(source_column=_NumericColumn(key='ZN', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), boundaries=(0, 10, 100)), _BucketizedColumn(source_column=_NumericColumn(key='RAD', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), boundaries=(0, 10, 25))), hash_bucket_size=4, hash_key=None), 'ZN_bucketized_X_TAX_bucketized': _CrossedColumn(keys=(_BucketizedColumn(source_column=_NumericColumn(key='ZN', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), boundaries=(0, 10, 100)), _BucketizedColumn(source_column=_NumericColumn(key='TAX', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), boundaries=(0, 200, 300, 500, 800))), hash_bucket_size=8, hash_key=None), 'RAD_bucketized_X_TAX_bucketized': _CrossedColumn(keys=(_BucketizedColumn(source_column=_NumericColumn(key='RAD', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), boundaries=(0, 10, 25)), _BucketizedColumn(source_column=_NumericColumn(key='TAX', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), boundaries=(0, 200, 300, 500, 800))), hash_bucket_size=8, hash_key=None)}

Define a DNN Estimator Creation Function

a. Get wide and deep feature columns


In [16]:
def get_wide_deep_columns():
    
    feature_columns = list(get_feature_columns(hparams).values())
    
    dense_columns = list(
        filter(lambda column: isinstance(column, feature_column._NumericColumn) |
                              isinstance(column, feature_column._EmbeddingColumn),
               feature_columns
        )
    )

    categorical_columns = list(
        filter(lambda column: isinstance(column, feature_column._VocabularyListCategoricalColumn) |
                              isinstance(column, feature_column._BucketizedColumn),
                   feature_columns)
    )
    
    sparse_columns = list(
        filter(lambda column: isinstance(column,feature_column._HashedCategoricalColumn) |
                              isinstance(column, feature_column._CrossedColumn),
               feature_columns)
    )

    indicator_columns = list(
            map(lambda column: tf.feature_column.indicator_column(column),
                categorical_columns)
    )
    
    deep_feature_columns = dense_columns + indicator_columns
    wide_feature_columns = categorical_columns + sparse_columns
    
    return wide_feature_columns, deep_feature_columns

b. Define the estimator


In [17]:
def create_DNNComb_estimator(run_config, hparams, print_desc=False):
    
    wide_feature_columns, deep_feature_columns = get_wide_deep_columns()
    
    estimator = tf.estimator.DNNLinearCombinedRegressor(
        
        dnn_feature_columns = deep_feature_columns,
        linear_feature_columns = wide_feature_columns,
        
        dnn_hidden_units= hparams.hidden_units,
        
        dnn_optimizer= tf.train.AdamOptimizer(),
        
        dnn_activation_fn= tf.nn.relu,
        
        config= run_config
    )
    
    
    if print_desc:
        print("")
        print("*Estimator Type:")
        print("================")
        print(type(estimator))
        print("")
        print("*deep columns:")
        print("==============")
        print(deep_feature_columns)
        print("")
        print("wide columns:")
        print("=============")
        print(wide_feature_columns)
        print("")
    
    return estimator

Run Experiment

a. Set HParam and RunConfig


In [18]:
TRAIN_SIZE = TRAIN_DATA_SIZE
NUM_EPOCHS = 10000
BATCH_SIZE = 177
EVAL_AFTER_SEC = 30
TOTAL_STEPS = (TRAIN_SIZE/BATCH_SIZE)*NUM_EPOCHS

hparams  = tf.contrib.training.HParams(
    num_epochs = NUM_EPOCHS,
    batch_size = BATCH_SIZE,
    hidden_units=[16, 8, 4],
    max_steps = TOTAL_STEPS
)

model_dir = 'trained_models/{}'.format(MODEL_NAME)

run_config = tf.estimator.RunConfig(
    log_step_count_steps=1000,
    tf_random_seed=19830610,
    model_dir=model_dir
)

print(hparams)
print("Model Directory:", run_config.model_dir)
print("")
print("Dataset Size:", TRAIN_SIZE)
print("Batch Size:", BATCH_SIZE)
print("Steps per Epoch:",TRAIN_SIZE/BATCH_SIZE)
print("Total Steps:", TOTAL_STEPS)
print("That is 1 evaluation step after each",EVAL_AFTER_SEC," training seconds")


[('batch_size', 177), ('hidden_units', [16, 8, 4]), ('max_steps', 20000.0), ('num_epochs', 10000)]
Model Directory: trained_models/housing-price-model-01

Dataset Size: 354
Batch Size: 177
Steps per Epoch: 2.0
Total Steps: 20000.0
That is 1 evaluation step after each 30  training seconds

b. Define TrainSpec and EvaluSpec


In [19]:
train_spec = tf.estimator.TrainSpec(
    input_fn = lambda: csv_input_fn(
        TRAIN_DATA_FILES_PATTERN,
        mode = tf.estimator.ModeKeys.TRAIN,
        num_epochs=hparams.num_epochs,
        batch_size=hparams.batch_size
    ),
    max_steps=hparams.max_steps,
    hooks=None
)

eval_spec = tf.estimator.EvalSpec(
    input_fn = lambda: csv_input_fn(
        TRAIN_DATA_FILES_PATTERN,
        mode=tf.estimator.ModeKeys.EVAL,
        num_epochs=1,
        batch_size=hparams.batch_size,
            
    ),
    throttle_secs = EVAL_AFTER_SEC,
    steps=None
)

c. Run Experiment via train_and_evaluate


In [20]:
if not RESUME_TRAINING:
    print("Removing previous artifacts...")
    shutil.rmtree(model_dir, ignore_errors=True)
else:
    print("Resuming training...") 

    
tf.logging.set_verbosity(tf.logging.INFO)

time_start = datetime.utcnow() 
print("Experiment started at {}".format(time_start.strftime("%H:%M:%S")))
print(".......................................") 

estimator = create_DNNComb_estimator(run_config, hparams, True)

tf.estimator.train_and_evaluate(
    estimator=estimator,
    train_spec=train_spec, 
    eval_spec=eval_spec
)

time_end = datetime.utcnow() 
print(".......................................")
print("Experiment finished at {}".format(time_end.strftime("%H:%M:%S")))
print("")
time_elapsed = time_end - time_start
print("Experiment elapsed time: {} seconds".format(time_elapsed.total_seconds()))


Removing previous artifacts...
Experiment started at 11:32:04
.......................................
INFO:tensorflow:Using config: {'_model_dir': 'trained_models/housing-price-model-01', '_tf_random_seed': 19830610, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 1000, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x11c2560f0>, '_task_type': 'worker', '_task_id': 0, '_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}

*Estimator Type:
================
<class 'tensorflow.python.estimator.canned.dnn_linear_combined.DNNLinearCombinedRegressor'>

*deep columns:
==============
[_NumericColumn(key='CRIM', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), _NumericColumn(key='ZN', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), _NumericColumn(key='INDUS', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), _NumericColumn(key='NOX', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), _NumericColumn(key='RM', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), _NumericColumn(key='AGE', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), _NumericColumn(key='DIS', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), _NumericColumn(key='RAD', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), _NumericColumn(key='TAX', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), _NumericColumn(key='PTRATIO', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), _NumericColumn(key='B', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), _NumericColumn(key='LSTAT', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), _IndicatorColumn(categorical_column=_VocabularyListCategoricalColumn(key='CHAS', vocabulary_list=('0', '1'), dtype=tf.string, default_value=-1, num_oov_buckets=0)), _IndicatorColumn(categorical_column=_BucketizedColumn(source_column=_NumericColumn(key='ZN', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), boundaries=(0, 10, 100))), _IndicatorColumn(categorical_column=_BucketizedColumn(source_column=_NumericColumn(key='RAD', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), boundaries=(0, 10, 25))), _IndicatorColumn(categorical_column=_BucketizedColumn(source_column=_NumericColumn(key='TAX', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), boundaries=(0, 200, 300, 500, 800)))]

wide columns:
=============
[_VocabularyListCategoricalColumn(key='CHAS', vocabulary_list=('0', '1'), dtype=tf.string, default_value=-1, num_oov_buckets=0), _BucketizedColumn(source_column=_NumericColumn(key='ZN', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), boundaries=(0, 10, 100)), _BucketizedColumn(source_column=_NumericColumn(key='RAD', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), boundaries=(0, 10, 25)), _BucketizedColumn(source_column=_NumericColumn(key='TAX', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), boundaries=(0, 200, 300, 500, 800)), _CrossedColumn(keys=(_BucketizedColumn(source_column=_NumericColumn(key='ZN', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), boundaries=(0, 10, 100)), _BucketizedColumn(source_column=_NumericColumn(key='RAD', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), boundaries=(0, 10, 25))), hash_bucket_size=4, hash_key=None), _CrossedColumn(keys=(_BucketizedColumn(source_column=_NumericColumn(key='ZN', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), boundaries=(0, 10, 100)), _BucketizedColumn(source_column=_NumericColumn(key='TAX', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), boundaries=(0, 200, 300, 500, 800))), hash_bucket_size=8, hash_key=None), _CrossedColumn(keys=(_BucketizedColumn(source_column=_NumericColumn(key='RAD', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), boundaries=(0, 10, 25)), _BucketizedColumn(source_column=_NumericColumn(key='TAX', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), boundaries=(0, 200, 300, 500, 800))), hash_bucket_size=8, hash_key=None)]

INFO:tensorflow:Running training and evaluation locally (non-distributed).
INFO:tensorflow:Start train and evaluate loop. The evaluate will happen after 30 secs (eval_spec.throttle_secs) or training is finished.

* data input_fn:
================
Input file(s): data/housing-train-01.csv
Batch size: 177
Epoch Count: 10000
Mode: train
Thread Count: 4
Shuffle: True
================

INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into trained_models/housing-price-model-01/model.ckpt.
INFO:tensorflow:loss = 112947.0, step = 1
INFO:tensorflow:loss = 11180.4, step = 101 (0.716 sec)
INFO:tensorflow:loss = 8257.91, step = 201 (0.410 sec)
INFO:tensorflow:loss = 8317.15, step = 301 (0.450 sec)
INFO:tensorflow:loss = 6641.9, step = 401 (0.391 sec)
INFO:tensorflow:loss = 6661.59, step = 501 (0.388 sec)
INFO:tensorflow:loss = 5468.6, step = 601 (0.445 sec)
INFO:tensorflow:loss = 4553.24, step = 701 (0.411 sec)
INFO:tensorflow:loss = 5301.4, step = 801 (0.393 sec)
INFO:tensorflow:loss = 4527.23, step = 901 (0.403 sec)
INFO:tensorflow:global_step/sec: 227.168
INFO:tensorflow:loss = 4091.83, step = 1001 (0.395 sec)
INFO:tensorflow:loss = 4163.75, step = 1101 (0.372 sec)
INFO:tensorflow:loss = 4866.37, step = 1201 (0.396 sec)
INFO:tensorflow:loss = 4840.62, step = 1301 (0.405 sec)
INFO:tensorflow:loss = 3631.64, step = 1401 (0.421 sec)
INFO:tensorflow:loss = 3680.43, step = 1501 (0.440 sec)
INFO:tensorflow:loss = 2978.92, step = 1601 (0.420 sec)
INFO:tensorflow:loss = 3469.06, step = 1701 (0.534 sec)
INFO:tensorflow:loss = 3610.71, step = 1801 (0.558 sec)
INFO:tensorflow:loss = 3449.1, step = 1901 (0.487 sec)
INFO:tensorflow:global_step/sec: 223.457
INFO:tensorflow:loss = 2949.65, step = 2001 (0.444 sec)
INFO:tensorflow:loss = 3002.09, step = 2101 (0.384 sec)
INFO:tensorflow:loss = 2078.43, step = 2201 (0.377 sec)
INFO:tensorflow:loss = 2718.58, step = 2301 (0.383 sec)
INFO:tensorflow:loss = 2830.31, step = 2401 (0.519 sec)
INFO:tensorflow:loss = 2751.09, step = 2501 (0.473 sec)
INFO:tensorflow:loss = 2699.22, step = 2601 (0.432 sec)
INFO:tensorflow:loss = 2658.68, step = 2701 (0.442 sec)
INFO:tensorflow:loss = 2116.29, step = 2801 (0.443 sec)
INFO:tensorflow:loss = 3203.14, step = 2901 (0.490 sec)
INFO:tensorflow:global_step/sec: 225.415
INFO:tensorflow:loss = 2925.51, step = 3001 (0.490 sec)
INFO:tensorflow:loss = 2662.44, step = 3101 (0.629 sec)
INFO:tensorflow:loss = 2311.81, step = 3201 (0.481 sec)
INFO:tensorflow:loss = 2869.73, step = 3301 (0.529 sec)
INFO:tensorflow:loss = 2016.4, step = 3401 (0.617 sec)
INFO:tensorflow:loss = 2454.77, step = 3501 (0.530 sec)
INFO:tensorflow:loss = 2170.73, step = 3601 (0.578 sec)
INFO:tensorflow:loss = 2275.68, step = 3701 (0.434 sec)
INFO:tensorflow:loss = 2864.94, step = 3801 (0.445 sec)
INFO:tensorflow:loss = 2163.78, step = 3901 (0.516 sec)
INFO:tensorflow:global_step/sec: 194.771
INFO:tensorflow:loss = 2010.61, step = 4001 (0.376 sec)
INFO:tensorflow:loss = 2204.34, step = 4101 (0.375 sec)
INFO:tensorflow:loss = 1916.48, step = 4201 (0.484 sec)
INFO:tensorflow:loss = 2182.72, step = 4301 (0.554 sec)
INFO:tensorflow:loss = 2275.26, step = 4401 (0.589 sec)
INFO:tensorflow:loss = 2104.09, step = 4501 (0.552 sec)
INFO:tensorflow:loss = 2619.23, step = 4601 (0.486 sec)
INFO:tensorflow:loss = 2115.97, step = 4701 (0.395 sec)
INFO:tensorflow:Saving checkpoints for 4780 into trained_models/housing-price-model-01/model.ckpt.
INFO:tensorflow:Loss for final step: 2158.66.

* data input_fn:
================
Input file(s): data/housing-train-01.csv
Batch size: 177
Epoch Count: 1
Mode: eval
Thread Count: 4
Shuffle: False
================

INFO:tensorflow:Starting evaluation at 2017-12-19-11:32:43
INFO:tensorflow:Restoring parameters from trained_models/housing-price-model-01/model.ckpt-4780
INFO:tensorflow:Finished evaluation at 2017-12-19-11:32:44
INFO:tensorflow:Saving dict for global step 4780: average_loss = 12.181, global_step = 4780, loss = 2156.04

* data input_fn:
================
Input file(s): data/housing-train-01.csv
Batch size: 177
Epoch Count: 10000
Mode: train
Thread Count: 4
Shuffle: True
================

INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Restoring parameters from trained_models/housing-price-model-01/model.ckpt-4780
INFO:tensorflow:Saving checkpoints for 4781 into trained_models/housing-price-model-01/model.ckpt.
INFO:tensorflow:loss = 2147.72, step = 4781
INFO:tensorflow:loss = 2143.41, step = 4881 (0.756 sec)
INFO:tensorflow:loss = 2097.17, step = 4981 (0.460 sec)
INFO:tensorflow:loss = 2294.74, step = 5081 (0.449 sec)
INFO:tensorflow:loss = 2066.6, step = 5181 (0.436 sec)
INFO:tensorflow:loss = 2521.42, step = 5281 (0.422 sec)
INFO:tensorflow:loss = 2014.43, step = 5381 (0.463 sec)
INFO:tensorflow:loss = 1640.02, step = 5481 (0.447 sec)
INFO:tensorflow:loss = 2587.91, step = 5581 (0.411 sec)
INFO:tensorflow:loss = 2549.08, step = 5681 (0.542 sec)
INFO:tensorflow:global_step/sec: 207.262
INFO:tensorflow:loss = 2164.01, step = 5781 (0.442 sec)
INFO:tensorflow:loss = 2377.39, step = 5881 (0.565 sec)
INFO:tensorflow:loss = 2261.73, step = 5981 (0.428 sec)
INFO:tensorflow:loss = 2336.98, step = 6081 (0.480 sec)
INFO:tensorflow:loss = 2048.8, step = 6181 (0.461 sec)
INFO:tensorflow:loss = 1427.18, step = 6281 (0.394 sec)
INFO:tensorflow:loss = 1627.19, step = 6381 (0.395 sec)
INFO:tensorflow:loss = 1932.48, step = 6481 (0.402 sec)
INFO:tensorflow:loss = 1831.71, step = 6581 (0.418 sec)
INFO:tensorflow:loss = 1904.88, step = 6681 (0.400 sec)
INFO:tensorflow:global_step/sec: 230.187
INFO:tensorflow:loss = 1850.07, step = 6781 (0.401 sec)
INFO:tensorflow:loss = 2052.73, step = 6881 (0.420 sec)
INFO:tensorflow:loss = 1313.79, step = 6981 (0.536 sec)
INFO:tensorflow:loss = 2123.96, step = 7081 (0.388 sec)
INFO:tensorflow:loss = 2490.77, step = 7181 (0.527 sec)
INFO:tensorflow:loss = 2268.26, step = 7281 (0.416 sec)
INFO:tensorflow:loss = 1958.27, step = 7381 (0.379 sec)
INFO:tensorflow:loss = 2029.33, step = 7481 (0.532 sec)
INFO:tensorflow:loss = 1419.27, step = 7581 (0.491 sec)
INFO:tensorflow:loss = 2379.73, step = 7681 (0.518 sec)
INFO:tensorflow:global_step/sec: 215.421
INFO:tensorflow:loss = 2311.26, step = 7781 (0.436 sec)
INFO:tensorflow:loss = 2090.93, step = 7881 (0.416 sec)
INFO:tensorflow:loss = 1828.97, step = 7981 (0.391 sec)
INFO:tensorflow:loss = 2340.82, step = 8081 (0.465 sec)
INFO:tensorflow:loss = 1656.72, step = 8181 (0.545 sec)
INFO:tensorflow:loss = 1855.88, step = 8281 (0.506 sec)
INFO:tensorflow:loss = 1760.0, step = 8381 (0.491 sec)
INFO:tensorflow:loss = 1758.81, step = 8481 (0.406 sec)
INFO:tensorflow:loss = 2011.16, step = 8581 (0.402 sec)
INFO:tensorflow:loss = 1583.61, step = 8681 (0.378 sec)
INFO:tensorflow:global_step/sec: 228.401
INFO:tensorflow:loss = 1590.63, step = 8781 (0.379 sec)
INFO:tensorflow:loss = 1726.34, step = 8881 (0.377 sec)
INFO:tensorflow:loss = 1467.94, step = 8981 (0.381 sec)
INFO:tensorflow:loss = 1734.8, step = 9081 (0.384 sec)
INFO:tensorflow:loss = 1798.57, step = 9181 (0.371 sec)
INFO:tensorflow:loss = 1573.02, step = 9281 (0.392 sec)
INFO:tensorflow:loss = 2120.81, step = 9381 (0.371 sec)
INFO:tensorflow:loss = 1707.76, step = 9481 (0.366 sec)
INFO:tensorflow:loss = 1758.16, step = 9581 (0.373 sec)
INFO:tensorflow:Saving checkpoints for 9601 into trained_models/housing-price-model-01/model.ckpt.
INFO:tensorflow:Loss for final step: 1750.02.

* data input_fn:
================
Input file(s): data/housing-train-01.csv
Batch size: 177
Epoch Count: 1
Mode: eval
Thread Count: 4
Shuffle: False
================

INFO:tensorflow:Starting evaluation at 2017-12-19-11:33:22
INFO:tensorflow:Restoring parameters from trained_models/housing-price-model-01/model.ckpt-9601
INFO:tensorflow:Finished evaluation at 2017-12-19-11:33:23
INFO:tensorflow:Saving dict for global step 9601: average_loss = 9.45206, global_step = 9601, loss = 1673.01

* data input_fn:
================
Input file(s): data/housing-train-01.csv
Batch size: 177
Epoch Count: 10000
Mode: train
Thread Count: 4
Shuffle: True
================

INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Restoring parameters from trained_models/housing-price-model-01/model.ckpt-9601
INFO:tensorflow:Saving checkpoints for 9602 into trained_models/housing-price-model-01/model.ckpt.
INFO:tensorflow:loss = 1611.15, step = 9602
INFO:tensorflow:loss = 1739.64, step = 9702 (0.869 sec)
INFO:tensorflow:loss = 1560.11, step = 9802 (0.469 sec)
INFO:tensorflow:loss = 1843.71, step = 9902 (0.486 sec)
INFO:tensorflow:loss = 1585.38, step = 10002 (0.390 sec)
INFO:tensorflow:loss = 2072.79, step = 10102 (0.552 sec)
INFO:tensorflow:loss = 1674.35, step = 10202 (0.383 sec)
INFO:tensorflow:loss = 1281.92, step = 10302 (0.379 sec)
INFO:tensorflow:loss = 2018.57, step = 10402 (0.379 sec)
INFO:tensorflow:loss = 2076.87, step = 10502 (0.378 sec)
INFO:tensorflow:global_step/sec: 214.478
INFO:tensorflow:loss = 1604.89, step = 10602 (0.379 sec)
INFO:tensorflow:loss = 1987.8, step = 10702 (0.383 sec)
INFO:tensorflow:loss = 1753.7, step = 10802 (0.467 sec)
INFO:tensorflow:loss = 1796.46, step = 10902 (0.478 sec)
INFO:tensorflow:loss = 1627.76, step = 11002 (0.398 sec)
INFO:tensorflow:loss = 1038.53, step = 11102 (0.386 sec)
INFO:tensorflow:loss = 1323.91, step = 11202 (0.396 sec)
INFO:tensorflow:loss = 1557.44, step = 11302 (0.371 sec)
INFO:tensorflow:loss = 1409.94, step = 11402 (0.380 sec)
INFO:tensorflow:loss = 1576.73, step = 11502 (0.373 sec)
INFO:tensorflow:global_step/sec: 249.763
INFO:tensorflow:loss = 1653.69, step = 11602 (0.372 sec)
INFO:tensorflow:loss = 1626.22, step = 11702 (0.374 sec)
INFO:tensorflow:loss = 1109.03, step = 11802 (0.370 sec)
INFO:tensorflow:loss = 1643.76, step = 11902 (0.371 sec)
INFO:tensorflow:loss = 1967.91, step = 12002 (0.375 sec)
INFO:tensorflow:loss = 1806.67, step = 12102 (0.382 sec)
INFO:tensorflow:loss = 1543.41, step = 12202 (0.375 sec)
INFO:tensorflow:loss = 1605.5, step = 12302 (0.375 sec)
INFO:tensorflow:loss = 1195.67, step = 12402 (0.371 sec)
INFO:tensorflow:loss = 1870.8, step = 12502 (0.371 sec)
INFO:tensorflow:global_step/sec: 264.817
INFO:tensorflow:loss = 1942.84, step = 12602 (0.412 sec)
INFO:tensorflow:loss = 1666.73, step = 12702 (0.382 sec)
INFO:tensorflow:loss = 1456.07, step = 12802 (0.398 sec)
INFO:tensorflow:loss = 1919.65, step = 12902 (0.398 sec)
INFO:tensorflow:loss = 1352.23, step = 13002 (0.405 sec)
INFO:tensorflow:loss = 1591.82, step = 13102 (0.432 sec)
INFO:tensorflow:loss = 1550.31, step = 13202 (0.380 sec)
INFO:tensorflow:loss = 1497.18, step = 13302 (0.369 sec)
INFO:tensorflow:loss = 1688.95, step = 13402 (0.403 sec)
INFO:tensorflow:loss = 1446.38, step = 13502 (0.378 sec)
INFO:tensorflow:global_step/sec: 255.398
INFO:tensorflow:loss = 1331.56, step = 13602 (0.371 sec)
INFO:tensorflow:loss = 1554.7, step = 13702 (0.392 sec)
INFO:tensorflow:loss = 1327.72, step = 13802 (0.405 sec)
INFO:tensorflow:loss = 1465.16, step = 13902 (0.399 sec)
INFO:tensorflow:loss = 1482.0, step = 14002 (0.393 sec)
INFO:tensorflow:loss = 1282.1, step = 14102 (0.423 sec)
INFO:tensorflow:loss = 1850.84, step = 14202 (0.588 sec)
INFO:tensorflow:loss = 1567.97, step = 14302 (0.384 sec)
INFO:tensorflow:loss = 1494.26, step = 14402 (0.387 sec)
INFO:tensorflow:loss = 1369.25, step = 14502 (0.405 sec)
INFO:tensorflow:global_step/sec: 240.287
INFO:tensorflow:loss = 1164.21, step = 14602 (0.385 sec)
INFO:tensorflow:loss = 1569.89, step = 14702 (0.395 sec)
INFO:tensorflow:loss = 1914.09, step = 14802 (0.421 sec)
INFO:tensorflow:loss = 1635.57, step = 14902 (0.421 sec)
INFO:tensorflow:loss = 1091.0, step = 15002 (0.393 sec)
INFO:tensorflow:loss = 1467.56, step = 15102 (0.392 sec)
INFO:tensorflow:Saving checkpoints for 15102 into trained_models/housing-price-model-01/model.ckpt.
INFO:tensorflow:Loss for final step: 1467.56.

* data input_fn:
================
Input file(s): data/housing-train-01.csv
Batch size: 177
Epoch Count: 1
Mode: eval
Thread Count: 4
Shuffle: False
================

INFO:tensorflow:Starting evaluation at 2017-12-19-11:33:59
INFO:tensorflow:Restoring parameters from trained_models/housing-price-model-01/model.ckpt-15102
INFO:tensorflow:Finished evaluation at 2017-12-19-11:34:00
INFO:tensorflow:Saving dict for global step 15102: average_loss = 8.2391, global_step = 15102, loss = 1458.32

* data input_fn:
================
Input file(s): data/housing-train-01.csv
Batch size: 177
Epoch Count: 10000
Mode: train
Thread Count: 4
Shuffle: True
================

INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Restoring parameters from trained_models/housing-price-model-01/model.ckpt-15102
INFO:tensorflow:Saving checkpoints for 15103 into trained_models/housing-price-model-01/model.ckpt.
INFO:tensorflow:loss = 1403.81, step = 15103
INFO:tensorflow:loss = 1587.19, step = 15203 (0.812 sec)
INFO:tensorflow:loss = 1286.68, step = 15303 (0.381 sec)
INFO:tensorflow:loss = 1637.38, step = 15403 (0.402 sec)
INFO:tensorflow:loss = 1329.74, step = 15503 (0.398 sec)
INFO:tensorflow:loss = 1772.51, step = 15603 (0.511 sec)
INFO:tensorflow:loss = 1407.55, step = 15703 (0.384 sec)
INFO:tensorflow:loss = 1180.68, step = 15803 (0.393 sec)
INFO:tensorflow:loss = 1770.34, step = 15903 (0.392 sec)
INFO:tensorflow:loss = 1735.86, step = 16003 (0.415 sec)
INFO:tensorflow:global_step/sec: 223.346
INFO:tensorflow:loss = 1479.24, step = 16103 (0.390 sec)
INFO:tensorflow:loss = 1814.01, step = 16203 (0.384 sec)
INFO:tensorflow:loss = 1635.08, step = 16303 (0.388 sec)
INFO:tensorflow:loss = 1606.41, step = 16403 (0.535 sec)
INFO:tensorflow:loss = 1496.59, step = 16503 (0.382 sec)
INFO:tensorflow:loss = 938.824, step = 16603 (0.394 sec)
INFO:tensorflow:loss = 1169.67, step = 16703 (0.379 sec)
INFO:tensorflow:loss = 1377.32, step = 16803 (0.399 sec)
INFO:tensorflow:loss = 1316.57, step = 16903 (0.405 sec)
INFO:tensorflow:loss = 1477.35, step = 17003 (0.511 sec)
INFO:tensorflow:global_step/sec: 238.949
INFO:tensorflow:loss = 1545.88, step = 17103 (0.409 sec)
INFO:tensorflow:loss = 1433.12, step = 17203 (0.418 sec)
INFO:tensorflow:loss = 1091.55, step = 17303 (0.402 sec)
INFO:tensorflow:loss = 1429.21, step = 17403 (0.412 sec)
INFO:tensorflow:loss = 1549.26, step = 17503 (0.400 sec)
INFO:tensorflow:loss = 1631.92, step = 17603 (0.408 sec)
INFO:tensorflow:loss = 1302.46, step = 17703 (0.421 sec)
INFO:tensorflow:loss = 1352.51, step = 17803 (0.409 sec)
INFO:tensorflow:loss = 1167.69, step = 17903 (0.401 sec)
INFO:tensorflow:loss = 1605.68, step = 18003 (0.396 sec)
INFO:tensorflow:global_step/sec: 244.564
INFO:tensorflow:loss = 1693.61, step = 18103 (0.423 sec)
INFO:tensorflow:loss = 1455.94, step = 18203 (0.420 sec)
INFO:tensorflow:loss = 1241.86, step = 18303 (0.382 sec)
INFO:tensorflow:loss = 1726.32, step = 18403 (0.382 sec)
INFO:tensorflow:loss = 1224.85, step = 18503 (0.383 sec)
INFO:tensorflow:loss = 1467.28, step = 18603 (0.400 sec)
INFO:tensorflow:loss = 1401.62, step = 18703 (0.382 sec)
INFO:tensorflow:loss = 1363.92, step = 18803 (0.388 sec)
INFO:tensorflow:loss = 1490.52, step = 18903 (0.388 sec)
INFO:tensorflow:loss = 1466.06, step = 19003 (0.387 sec)
INFO:tensorflow:global_step/sec: 255.907
INFO:tensorflow:loss = 1178.05, step = 19103 (0.392 sec)
INFO:tensorflow:loss = 1413.14, step = 19203 (0.393 sec)
INFO:tensorflow:loss = 1232.06, step = 19303 (0.387 sec)
INFO:tensorflow:loss = 1298.45, step = 19403 (0.394 sec)
INFO:tensorflow:loss = 1302.67, step = 19503 (0.412 sec)
INFO:tensorflow:loss = 1113.79, step = 19603 (0.403 sec)
INFO:tensorflow:loss = 1643.6, step = 19703 (0.385 sec)
INFO:tensorflow:loss = 1404.25, step = 19803 (0.380 sec)
INFO:tensorflow:loss = 1390.34, step = 19903 (0.383 sec)
INFO:tensorflow:Saving checkpoints for 20000 into trained_models/housing-price-model-01/model.ckpt.
INFO:tensorflow:Loss for final step: 1468.92.

* data input_fn:
================
Input file(s): data/housing-train-01.csv
Batch size: 177
Epoch Count: 1
Mode: eval
Thread Count: 4
Shuffle: False
================

INFO:tensorflow:Starting evaluation at 2017-12-19-11:34:36
INFO:tensorflow:Restoring parameters from trained_models/housing-price-model-01/model.ckpt-20000
INFO:tensorflow:Finished evaluation at 2017-12-19-11:34:37
INFO:tensorflow:Saving dict for global step 20000: average_loss = 7.49497, global_step = 20000, loss = 1326.61
.......................................
Experiment finished at 11:34:37

Experiment elapsed time: 153.405469 seconds

Evaluate the Model


In [21]:
train_input_fn = lambda: csv_input_fn(files_name_pattern= TRAIN_DATA_FILES_PATTERN, 
                                      mode= tf.estimator.ModeKeys.EVAL,
                                      batch_size= TRAIN_DATA_SIZE)


test_input_fn = lambda: csv_input_fn(files_name_pattern= TEST_DATA_FILES_PATTERN, 
                                      mode= tf.estimator.ModeKeys.EVAL,
                                      batch_size= TEST_DATA_SIZE)

estimator = create_DNNComb_estimator(run_config, hparams)

train_results = estimator.evaluate(input_fn=train_input_fn, steps=1)
train_rmse = round(math.sqrt(train_results["average_loss"]),5)
print()
print("############################################################################################")
print("# Train RMSE: {} - {}".format(train_rmse, train_results))
print("############################################################################################")

test_results = estimator.evaluate(input_fn=test_input_fn, steps=1)
test_rmse = round(math.sqrt(test_results["average_loss"]),5)
print()
print("############################################################################################")
print("# Test RMSE: {} - {}".format(test_rmse, test_results))
print("############################################################################################")


INFO:tensorflow:Using config: {'_model_dir': 'trained_models/housing-price-model-01', '_tf_random_seed': 19830610, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 1000, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x11c2560f0>, '_task_type': 'worker', '_task_id': 0, '_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}

* data input_fn:
================
Input file(s): data/housing-train-01.csv
Batch size: 354
Epoch Count: None
Mode: eval
Thread Count: 4
Shuffle: False
================

INFO:tensorflow:Starting evaluation at 2017-12-19-11:34:40
INFO:tensorflow:Restoring parameters from trained_models/housing-price-model-01/model.ckpt-20000
INFO:tensorflow:Evaluation [1/1]
INFO:tensorflow:Finished evaluation at 2017-12-19-11:34:41
INFO:tensorflow:Saving dict for global step 20000: average_loss = 7.49497, global_step = 20000, loss = 2653.22

############################################################################################
# Train RMSE: 2.73769 - {'average_loss': 7.4949684, 'loss': 2653.2188, 'global_step': 20000}
############################################################################################

* data input_fn:
================
Input file(s): data/housing-test-01.csv
Batch size: 152
Epoch Count: None
Mode: eval
Thread Count: 4
Shuffle: False
================

INFO:tensorflow:Starting evaluation at 2017-12-19-11:34:43
INFO:tensorflow:Restoring parameters from trained_models/housing-price-model-01/model.ckpt-20000
INFO:tensorflow:Evaluation [1/1]
INFO:tensorflow:Finished evaluation at 2017-12-19-11:34:45
INFO:tensorflow:Saving dict for global step 20000: average_loss = 15.905, global_step = 20000, loss = 2417.56

############################################################################################
# Test RMSE: 3.98811 - {'average_loss': 15.904994, 'loss': 2417.5591, 'global_step': 20000}
############################################################################################

Prediction


In [22]:
import itertools

predict_input_fn = lambda: csv_input_fn(files_name_pattern= TEST_DATA_FILES_PATTERN, 
                                      mode= tf.estimator.ModeKeys.PREDICT,
                                      batch_size= 5)

predictions = estimator.predict(input_fn=predict_input_fn)
values = list(map(lambda item: item["predictions"][0],list(itertools.islice(predictions, 5))))
print()
print("Predicted Values: {}".format(values))


* data input_fn:
================
Input file(s): data/housing-test-01.csv
Batch size: 5
Epoch Count: None
Mode: infer
Thread Count: 4
Shuffle: False
================

WARNING:tensorflow:Input graph does not contain a QueueRunner. That means predict yields forever. This is probably a mistake.
INFO:tensorflow:Restoring parameters from trained_models/housing-price-model-01/model.ckpt-20000

Predicted Values: [31.980909, 31.160217, 19.675268, 19.495461, 20.486834]